import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.nio.charset.Charset;
/**
* JSoup Hello World
*
* Created by vedenin on 16.01.16.
*/
public class URLDownloadTests {
private final static String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36";
private static void initHTTPSDownload() throws Exception {
// Create a new trust manager that trust all certificates
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
}
};
// Activate the new trust manager
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
} catch (Exception e) {
System.out.print(e.getMessage());
}
}
private static String testJsoup(String url) throws Exception {
return Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token")
.timeout(30000).get().html();
}
private static String testJsoupHeadlines(String url) throws Exception {
Document doc = Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token")
.timeout(30000).get();
Elements newsHeadlines = doc.select("#mp-itn b a");
return newsHeadlines.html();
}
private static void testHtmlParser(String url) throws Exception {
Document doc = Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token")
.timeout(30000).get();
Charset charset = doc.charset();
System.out.println("charset = " + charset);
System.out.println("location = " + doc.location());
System.out.println("nodeName = " + doc.nodeName());
Document.OutputSettings outputSettings = doc.outputSettings();
System.out.println("charset = " + outputSettings.charset());
System.out.println("indentAmount = " + outputSettings.indentAmount());
System.out.println("syntax = " + outputSettings.syntax());
System.out.println("escapeMode = " + outputSettings.escapeMode());
System.out.println("prettyPrint = " + outputSettings.prettyPrint());
System.out.println("outline = " + outputSettings.outline());
System.out.println("title = " + doc.title());
System.out.println("baseUri = " + doc.baseUri());
Element head = doc.head();
Elements children = head.children();
for(Element child: children) {
System.out.print(child.tag().getName() + " : ");
System.out.println(child);
}
printElements(doc.body().children());
}
private static void printElements(Elements children) {
for(Element child: children) {
if(!child.text().isEmpty()) {
System.out.print(child.tag().getName() + " : ");
System.out.println(child.text());
}
printElements(child.children());
}
}
public static void main(String[] s) throws Exception {
initHTTPSDownload();
String wikipedia = testJsoup("http://en.wikipedia.org/");
System.out.println(wikipedia.length()); // print something about 70694
String headlines = testJsoupHeadlines("http://en.wikipedia.org/");
System.out.println(headlines);
String stackoverflow = testJsoup("http://stackoverflow.com/");
System.out.println(stackoverflow.length()); // print something about 70694
testHtmlParser("http://stackoverflow.com/");
}
}